import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from scipy.stats import chi2_contingency
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Filter the data for 'Punggol' Planning Area
punggol_data = df[df['Planning Area'] == 'Punggol']
# Create a plot for 'Punggol'
fig = px.bar(punggol_data,
y='Age Group',
x='Resident Count',
color='Subzone',
labels={'Age Group': 'Age Group', 'Resident Count': 'Resident Count'},
title='Distribution of Resident Count in Punggol')
fig.update_layout(yaxis={'categoryorder': 'array', 'categoryarray': ['0 - 4', '5 - 9', '10 - 14', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '40 - 44',
'45 - 49', '50 - 54', '55 - 59', '60 - 64', '65 - 69', '70 - 74', '75 - 79', '80 - 84',
'85 - 89', '90 & Over']},
yaxis_title='Age Group',
xaxis_title='Resident Count')
fig.show()
# Assuming you have a DataFrame named 'punggol_data' with the columns 'Age Group' and 'Type of Dwelling'
# Create a contingency table
contingency_table = pd.crosstab(punggol_data['Age Group'], punggol_data['Type of Dwelling'])
# Perform chi-square test
chi2, p_value, _, _ = chi2_contingency(contingency_table)
# Print the chi-square statistic and p-value
print("Chi-square statistic:", chi2)
print("p-value:", p_value)
Chi-square statistic: 0.0 p-value: 1.0
alpha = 0.05
if p_value < alpha:
print("There is a significant relationship between Type of Dwelling and Age Group in Punggol.")
else:
print("There is no significant relationship between Type of Dwelling and Age Group in Punggol.")
There is no significant relationship between Type of Dwelling and Age Group in Punggol.
Overall, the results suggest that there is no significant association between the 'Age Group' and 'Type of Dwelling' variables in Punggol, as indicated by the low chi-square statistic, high p-value.
# Filter the data for 'AMK' Planning Area
amk_data = df[df['Planning Area'] == 'Ang Mo Kio']
# Create a plot for 'Punggol'
fig = px.bar(amk_data,
y='Age Group',
x='Resident Count',
color='Subzone',
labels={'Age Group': 'Age Group', 'Resident Count': 'Resident Count'},
title='Distribution of Resident Count in Ang Mo Kio')
fig.update_layout(yaxis={'categoryorder': 'array', 'categoryarray': ['0 - 4', '5 - 9', '10 - 14', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '40 - 44',
'45 - 49', '50 - 54', '55 - 59', '60 - 64', '65 - 69', '70 - 74', '75 - 79', '80 - 84',
'85 - 89', '90 & Over']},
yaxis_title='Age Group',
xaxis_title='Resident Count')
fig.show()
# Create a contingency table
contingency_table = pd.crosstab(amk_data['Age Group'], amk_data['Type of Dwelling'])
# Perform chi-square test
chi2, p_value, _, _ = chi2_contingency(contingency_table)
# Print the chi-square statistic and p-value
print("Chi-square statistic:", chi2)
print("p-value:", p_value)
if p_value < alpha:
print("There is a significant relationship between Type of Dwelling and Age Group in Ang Mo Kio.")
else:
print("There is no significant relationship between Type of Dwelling and Age Group in Ang Mo Kio.")
Chi-square statistic: 0.0 p-value: 1.0 There is no significant relationship between Type of Dwelling and Age Group in Ang Mo Kio.
Overall, the results suggest that there is no significant association between the 'Age Group' and 'Type of Dwelling' variables in Ang Mo Kio, as indicated by the low chi-square statistic, high p-value.